In [1]:
from __future__ import absolute_import, division, print_function
MEAN_PIXEL_VALUE = 128
PIXEL_STANDARD_DEVIATION = 80
SEGMENT_SIZE = 128
In [2]:
import numpy as np
def preprocess_image(image):
"""Converts the given image to grayscale, subtracts the mean pixel value, and divides by the standard deviation*.
The returned np.array uses 16-bit floats to conserve memory.
"""
global MEAN_PIXEL_VALUE
global PIXEL_STANDARD_DEVIATION
image = image.convert('L')
image = np.array(image).astype(np.float16)
image = image - MEAN_PIXEL_VALUE
image = image / PIXEL_STANDARD_DEVIATION
return image
In [3]:
from os import walk
from PIL import Image
def load_images(dir_path, n_max=-1):
"""Loads the images from dir_path into RAM.
A cutoff can me specified as the second argument.
Returns the loaded images.
"""
images = []
for filename in next(walk(dir_path))[2]:
if len(images) == n_max:
break
images.append(preprocess_image(Image.open(dir_path + filename)))
return images
In [4]:
training_images = []
training_labels = []
training_images.extend(load_images('../data/train/no-text/'))
n_no_text = len(training_images)
training_labels.extend([[1, 0] for i in range(n_no_text)])
training_images.extend(load_images('../data/train/text/'))
n_text = len(training_images) - n_no_text
training_labels.extend([[0, 1] for i in range(n_text)])
training_images = np.array(training_images)
training_labels = np.array(training_labels)
In [5]:
validation_images = []
validation_labels = []
validation_images.extend(load_images('../data/valid/no-text/'))
n_no_text = len(validation_images)
validation_labels.extend([[1, 0] for i in range(n_no_text)])
validation_images.extend(load_images('../data/valid/text/'))
n_text = len(validation_images) - n_no_text
validation_labels.extend([[0, 1] for i in range(n_text)])
validation_images = np.array(validation_images)
validation_labels = np.array(validation_labels)
In [6]:
def get_batch(batch_size, use_training_set=True):
"""Selects a random batch from either the training or the validation set.
Returns the loaded images and their labels.
"""
if use_training_set:
indexes = np.random.choice(np.arange(len(training_images)), batch_size, False)
images = training_images[indexes]
labels = training_labels[indexes]
else:
indexes = np.random.choice(np.arange(len(validation_images)), batch_size, False)
images = validation_images[indexes]
labels = validation_labels[indexes]
return images, labels
In [7]:
from matplotlib import pyplot as plt
%matplotlib inline
def fill_feed_dict(pl_images, pl_labels, pl_learning_rate, pl_keep_prob, batch_size, learning_rate, keep_prob):
"""Creates a feed_dict, which is used by TensorFlow to train/validate the ConvNet.
The feed_dict contains both data, labels, and hyperparameters that change during training:
* The learning_rate
* The probability that the dropout layer(s) NOT drop a given value (keep_prob)
Returns the feed_dict.
"""
global SEGMENT_SIZE
training = learning_rate != 0
images, labels = get_batch(batch_size, training)
images = np.reshape(images, (batch_size, SEGMENT_SIZE, SEGMENT_SIZE, 1))
labels = np.reshape(labels, (batch_size, 2))
feed_dict = {
pl_images: images,
pl_labels: labels,
pl_learning_rate: learning_rate,
#pl_keep_prob: keep_prob # Disabled so that the graph can run on Android
}
return feed_dict
test_feed_dict = fill_feed_dict('images', 'labels', 'learning_rate', 'keep_prob', 2, 0.01, 1.0)
for i in range(len(test_feed_dict['images'])):
plt.figure()
plt.title(str(test_feed_dict['labels'][i]))
plt.imshow(test_feed_dict['images'][i].squeeze(), cmap=plt.cm.gray)
In [8]:
import tensorflow as tf
WEIGHT_PENALTY_RATE = 3e-3
def weight_variable(shape, stddev):
global WEIGHT_PENALTY_RATE
initial = tf.truncated_normal(shape, stddev=stddev)
weights = tf.Variable(initial, name='weights')
tf.add_to_collection('losses', tf.mul(tf.nn.l2_loss(weights), WEIGHT_PENALTY_RATE))
return weights
def bias_variable(shape, init):
global WEIGHT_PENALTY_RATE
initial = tf.constant(init, shape=shape)
biases = tf.Variable(initial, name='biases')
tf.add_to_collection('losses', tf.mul(tf.nn.l2_loss(biases), WEIGHT_PENALTY_RATE))
return biases
def conv2d(x, W):
"""Creates a 2D convolutional layer with stride 2.
Returns the created layer.
"""
return tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
In [9]:
# Make sure we don't count losses multiple times, by resetting the graph
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None, SEGMENT_SIZE, SEGMENT_SIZE, 1], name='input')
y_ = tf.placeholder(tf.float32, shape=[None, 2], name='ground_truth')
with tf.name_scope('conv1') as scope:
W_conv1 = weight_variable([5, 5, 1, 32], 1e-4)
b_conv1 = bias_variable([32], 0.1)
h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
with tf.name_scope('mp1') as scope:
h_pool1 = max_pool_2x2(h_conv1)
with tf.name_scope('conv2') as scope:
W_conv2 = weight_variable([5, 5, 32, 64], 1e-4)
b_conv2 = bias_variable([64], 0.1)
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
n = SEGMENT_SIZE // (2 ** 4)
with tf.name_scope('mp2') as scope:
h_pool2 = max_pool_2x2(h_conv2)
h_pool2_flat = tf.reshape(h_pool2, [-1, n * n * 64])
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([n * n * 64, 512], 0.04)
b_fc1 = bias_variable([512], 0.1)
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# For some reason the Android version of TF doesn't like dropout
#with tf.name_scope('fc1_dropout):
# pl_keep_prob = tf.placeholder(tf.float32, name='keep_prob')
# h_fc1_drop = tf.nn.dropout(h_fc1, pl_keep_prob)
pl_keep_prob = 'foo'
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([512, 2], 0.1)
b_fc2 = bias_variable([2], 0.1)
o_fc2 = tf.matmul(h_fc1, W_fc2) + b_fc2
y_conv = tf.nn.softmax(o_fc2, name='output')
with tf.name_scope('cross_entropy_mean'):
cross_entropy_mean = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y_conv, 1e-10, 1)))
tf.add_to_collection('losses', cross_entropy_mean)
In [10]:
with tf.name_scope('loss'):
loss = tf.add_n(tf.get_collection('losses'))
pl_learning_rate = tf.placeholder(tf.float32, shape=[])
train_step = tf.train.MomentumOptimizer(pl_learning_rate, momentum=0.9).minimize(loss)
In [11]:
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.arg_max(y_conv, 1), tf.arg_max(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [12]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
writer = tf.train.SummaryWriter('./training_logs/', sess.graph_def)
sess.run(tf.initialize_all_variables())
# Training parameters
test_interval = 600
test_iters = 300
display_interval = 50 # Affects how often the training loss is printed
n_iters = test_interval * 60 # The total number of training steps
batch_size = 100
# Learning rate parameters
learning_rate = 0.003
step_size = test_interval * 30
lr_step_rate = 0.3
# These are plotted later to get feedback on how the training went
all_validation_accuracies = []
all_validation_losses = []
all_training_losses = []
for i in range(n_iters):
if i % test_interval == 0:
validation_accuracies = []
validation_losses = []
for j in range(test_iters):
feed_dict = fill_feed_dict(x, y_, pl_learning_rate, pl_keep_prob, batch_size, 0, 1)
validation_accuracy, validation_loss = sess.run([accuracy, loss], feed_dict=feed_dict)
validation_accuracies.append(validation_accuracy)
validation_losses.append(validation_loss)
mean_validation_accuracy = np.mean(validation_accuracies)
mean_validation_loss = np.mean(validation_losses)
print('Validation accuracy: ' + str(mean_validation_accuracy))
print('Validation loss: ' + str(mean_validation_loss))
all_validation_accuracies.append(mean_validation_accuracy)
all_validation_losses.append(mean_validation_loss)
feed_dict = fill_feed_dict(x, y_, pl_learning_rate, pl_keep_prob, batch_size, learning_rate, 0.5)
_, training_loss = sess.run([train_step, loss], feed_dict=feed_dict)
if i % display_interval == 0:
print('Training loss: ' + str(training_loss))
print('========')
all_training_losses.append(training_loss)
if i % step_size == 0 and i != 0:
learning_rate = learning_rate * lr_step_rate
print('Updated learning rate: ' + str(learning_rate))
print('========')
In [13]:
from matplotlib import pyplot as plt
import matplotlib as mpl
plt.style.use('bmh')
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.size'] = 12
mpl.rcParams['figure.figsize'] = [10, 7]
completed_iters = display_interval * len(all_training_losses)
ind_training = range(0, completed_iters, display_interval)
plt.plot(ind_training, all_training_losses, c='#7fcdbb')
ind_validation = range(0, completed_iters, test_interval)
plt.plot(ind_validation, all_validation_losses, c='#2c7fb8')
plt.plot(ind_validation, all_validation_accuracies, c='#ff851b')
plt.xlabel('Training steps')
plt.show()
In [14]:
saver = tf.train.Saver()
saver.save(sess, './saved_checkpoint', 0, 'checkpoint_state')
tf.train.write_graph(sess.graph.as_graph_def(), '.', 'input_graph.pb')
In [15]:
from freeze_graph import freeze_graph
freeze_graph('./input_graph.pb', '', False, './saved_checkpoint-0',
'output', 'save/restore_all',
'save/Const:0', '../second-sight/assets/tensorflow_text_detector.pb',
False, None)
In [ ]:
%%bash
cd ..
bazel mobile-install //second-sight:second-sight --start_app